clear all

// SET MACROS
global Input  "Y:/limited/Michigan_CTE/funding_change/data_derived"
global Output "Y:/limited/Michigan_CTE/funding_change/data_derived"

// LOAD DATA
use $Input/g9_cohorts.dta

// MERGE DATA

// CTE enrollment
merge 1:1 student using $Input/cte_advancement.dta, nogen

// Math & reading score in Grade 8
merge 1:1 student using $Input/math_reading_score_g8.dta, keep(1 3) nogen

// ACT & SAT score in Grade 11
merge 1:1 student using $Input/act_sat_score_g11.dta, keep(1 3) nogen

// Merge in NSC college outcomes
merge 1:1 student grad_year using $Input/nsc_enrollment.dta, keep(1 3) nogen

// School-level
merge m:1 school cohort9 using $Input/school_level.dta, keep(1 3) nogen

// Census block
ren census_block censusblockgroup
merge m:1 censusblockgroup  using $Input/census_block.dta, keep(1 3) nogen

//Millage 
ren grad_year year
ren district dcode
merge m:1 dcode year using Y:\limited\Michigan_CTE\funding_change\data_raw\dcode_cte_millage.dta
drop if _m==2
recode revenue_pp .=0
ren year grad_year
ren dcode district 

// CREATE ANALYSIS SAMPLE

bys school: gegen min = min(enroll)
gen small   = min<50 |min==.
gen trad_voc = inlist(school_type,1,3)
gen state = inlist(entity, 7, 10, 11)
gen isdschool = inlist(entity, 2, 3)
gen high_school = (level >=3| (level==2 & high_grade>=9))

drop if state==1 | isdschool==1


// Keep regular and vocational schools (drop alternative and special ed)
// Drop state, charter, and private schools
// Drop elementary schools and middle schools with no HS grades
// Drop small schools which ever have < 50 students enrolled
drop min

gen detroit = district==82010
gen detroit_isd = isdcode==82000



// DEAL WITH MISSING VALUES

// REPLACE OUTCOMES WITH 0

ren (cte_any_course*) (x_any_course*)

foreach var of varlist cte* max_segment max_courses max_semesters first* *_by_* any_college two_yr* four_yr* public private took_test*  {
    replace `var' = 0 if `var' == .
}
replace cte_best_score = . if cte_test_any==0

ren (x_any_course*) (cte_any_course*)

// UPDATE COVARIATE IMPUTATION FLAGS AND REPLACE WITH CONSTANT
replace schoolchar_imp = 1 if  inlist(.,charter,enroll,fte_experience,city,suburb,town_rural)
recode charter enroll fte_experience city suburb town_rural (.=0)  //0 changes

replace cbg_baplus_imp = 1 if 	cbg_baplus == .
forval y = 2007/2016{
	sum cbg_baplus if cohort9==`y' & cbg_baplus_imp==0, meanonly
	replace cbg_baplus = `r(mean)' if cohort9==`y' & cbg_baplus_imp==1
}

foreach var of varlist math_g8 read_g8 {
  replace `var'_imp = 1 if missing(`var')
  replace `var' = 0 if missing(`var')
}

replace test_avg_g8_imp = 1 if missing(test_avg_g8)
replace test_avg_g8 = 0 if missing(test_avg_g8)
replace test_avg_sq_g8 = 0 if missing(test_avg_sq_g8)

foreach var of varlist act_composite_g11 act_english_g11 act_reading_g11 act_math_g11 sat_composite_g11 sat_reading_g11 sat_math_g11 {
  gen byte `var'_imp = `var' == .
  replace `var' = 0 if `var' == .
}

gen byte no_cte = 1 - cte_any



// CREATE COHORT-SCHOOL FIXED EFFECTS
egen int cohort_bcode_fe = group(cohort9 school)

// CREATE COHORT INDICATORS
levelsof grad_year, local(cohort_yr)
foreach x of local cohort_yr {
  gen byte c`x' = grad_year == `x'
}

// Create District means
preserve
keep if inrange(grad_year,2012,2014)|district==84060 // one major district created only after
gcollapse district_poor = econdis district_cte_any = cte_any district_cte_comp = cte_comp   white (median) district_urbanicity = d_urbanicity, by(district)
gen district_pct_nonwhite = 1 - white
drop white
xtile district_poor_quartile = district_poor, nq(4)
gen district_most_poor = district_poor_quartile==4
save "Y:/limited/Michigan_CTE/funding_change/data_derived/temp/district_characteristics.dta", replace
restore


merge m:1 district using "Y:/limited/Michigan_CTE/funding_change/data_derived/temp/district_characteristics.dta" ,nogen
order district_*, after(schoolchar_imp)

// DROP VARIABLES
drop high_grade level

// FORMAT RIC
format student %16.0f

// LABEL VARIABLES
label var district_poor "Percent economically disadvantaged in district (2012-2014)"
label var district_poor_quartile "Quartile of district poverty (2012-2014)"
label var district_most_poor "District in poorest quartile (2012-2014)"
label var district_cte_any "Percent CTE participants  in district (2012-2014)"
label var district_cte_comp "Percent CTE completers in district (2012-2014)"
label var no_cte "Did not participate in a CTE program"
label var cohort_bcode_fe "Cohort-school fixed effects"
label var grad_year "Expected Graduation Year"
forvalues x = 2010/2019 {
  label var c`x' "Indicator- Expected Graduating Class of `x'"
}

// SORT AND SAVE
sort cohort9 student
compress
save "Y:/limited/Michigan_CTE/funding_change/data_final/compiled_student_analysis_data.dta", replace
